/*==============================================================================
FILE NAME: Figure_6.do
CREATED: 12 June 2025
==============================================================================*/

**Figure 6

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"
	global point_estimates "$figures/point_estimates"
	// Define global paths for replication package
} 
*/

//Panel A: Cat A as outcome, complaint air inv as treatment
set scheme modern
* Load main dataset
use "$processed_data/Air_Panel.dta", clear

* Drop facilities never inspected for air complaints
drop if never_air_inv == 1

* Define time variable (year-month)
egen t = group(year month)
xtset RN_id t

* Generate difference variables
forv h = 0/12 {
	gen p_air_catA_`h' = f`h'.p_air_catA - l1.p_air_catA
}

forv h = 2/12 {
	gen p_air_catA_neg`h' = l`h'.p_air_catA - l1.p_air_catA
}

* Generate facility-year variable
egen RN_year = group(RN year)

* Prepare storage variables
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category A violation before and after a complaint
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catA_`h' p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == `h'
replace se = _se[p_air_complaint_inv] if Years == `h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == `h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catA_neg`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == -`h'
replace se = _se[p_air_complaint_inv] if Years == -`h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == -`h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == -`h'
}

* Keep if year is included
keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_A.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.01)0.04, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category A Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)


* Export figure
graph export "$figures/Figure_6_Panel_A.pdf", replace


* Panel C
clear
use "$processed_data/Air_Panel.dta", clear

* Drop facilities never inspected for air complaints
drop if never_air_inv == 1

* Define time variable (year-month)
egen t = group(year month)
xtset RN_id t

* Generate outcome variables
forv h = 0/12 {
	gen p_air_catB_`h' = f`h'.p_air_catB - l1.p_air_catB
}

forv h = 2/12 {
	gen p_air_catB_neg`h' = l`h'.p_air_catB - l1.p_air_catB
}

* Generate facility-year variable
egen RN_year = group(RN year)

* Prepare storage variables
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category B violation before and after a complaint

foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catB_`h' p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == `h'
replace se = _se[p_air_complaint_inv] if Years == `h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == `h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catB_neg`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == -`h'
replace se = _se[p_air_complaint_inv] if Years == -`h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == -`h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == -`h'
}

* Keep if year is included
keep if Years != .
keep b u d se Years Zero

* Export the point estimates 
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_C.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.1)0.4, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category B Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)


* Create figure
graph export "$figures/Figure_6_Panel_C.pdf", replace

* Panel E
clear
use "$processed_data/Air_Panel.dta", clear

* Drop facilities never inspected for air complaints
drop if never_air_inv == 1

* Define time variable (year-month)
egen t = group(year month)
xtset RN_id t

* Generate outcome variables
forv h = 0/12 {
	gen p_air_catC_`h' = f`h'.p_air_catC - l1.p_air_catC
}

forv h = 2/12 {
	gen p_air_catC_neg`h' = l`h'.p_air_catC - l1.p_air_catC
}

* Generate fixed effect: facility-year
egen RN_year = group(RN year)

* Create variables
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category C violation before and after a complaint

foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catC_`h' p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == `h'
replace se = _se[p_air_complaint_inv] if Years == `h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == `h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catC_neg`h'  p_air_complaint_inv, absorb(RN_year t) cluster(RN_id)
replace b = _b[p_air_complaint_inv] if Years == -`h'
replace se = _se[p_air_complaint_inv] if Years == -`h'
replace u = (_b[p_air_complaint_inv] + 1.96*_se[p_air_complaint_inv]) if Years == -`h'
replace d = (_b[p_air_complaint_inv] - 1.96*_se[p_air_complaint_inv]) if Years == -`h'
}

* Keep if years is included
keep if Years != .
keep b u d se Years Zero

*
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_E.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.01)0.06, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category C Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)

graph export "$figures/Figure_6_Panel_E.pdf", replace

//Cat A as outcome, non-complaint onsite air inv as treatment
clear
use "$processed_data/Panel_inv_types_onsite_included_final.dta", clear
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category A violation before and after a non-complaint

foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catA_`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == `h'
replace se = _se[p_air_noncomp_onsite] if Years == `h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catA_neg`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == -`h'
replace se = _se[p_air_noncomp_onsite] if Years == -`h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
}

* Keep if years is included
keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_B.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.01)0.04, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category A Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)


* Create graph
graph export "$figures/Figure_6_Panel_B.pdf", replace

//Cat B as outcome, non-complaint onsite air inv as treatment
use "$processed_data/Panel_inv_types_onsite_included_final.dta", clear
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category B violation before and after a non-complaint
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catB_`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == `h'
replace se = _se[p_air_noncomp_onsite] if Years == `h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catB_neg`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == -`h'
replace se = _se[p_air_noncomp_onsite] if Years == -`h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
}
keep if Years != .
keep b u d se Years Zero
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_D.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.1)0.4, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category B Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)

graph export "$figures/Figure_6_Panel_D.pdf", replace

* Panel F

* Load in data
use "$processed_data/Panel_inv_types_onsite_included_final.dta", clear
cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0

* Run regressions for association between likelihood of a category C violation before and after a non-complaint
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catC_`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == `h'
replace se = _se[p_air_noncomp_onsite] if Years == `h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == `h'
}
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_catC_neg`h'  p_air_noncomp_onsite, absorb(RN_year t) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_noncomp_onsite] if Years == -`h'
replace se = _se[p_air_noncomp_onsite] if Years == -`h'
replace u = (_b[p_air_noncomp_onsite] + 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
replace d = (_b[p_air_noncomp_onsite] - 1.96*_se[p_air_noncomp_onsite]) if Years == -`h'
}

* Keep if years are included
keep if Years != .
keep b u d se Years Zero

* Export point estimates
export delimited "$point_estimates/Point_Estimates_Figure_6_Panel_F.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(gs10) fint(inten30) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(black) lpattern(solid) lwidth(thin)) ///
    (line Zero Years, lcolor(gs8)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.01)0.06, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Category C Violation)", size(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)


* Create figures
graph export "$figures/Figure_6_Panel_F.pdf", replace